Multimodal Representation and Retrieval


Visual Adaptive Prompting for Compositional Zero-Shot Learning
Kyle Stein,
Andrew Arash Mahyari,
Guillermo Francia,
Eman El-Sheikh
[pdf] [arXiv]
[bibtex]
@InProceedings{Stein_2025_ICCV, author = {Stein, Kyle and Mahyari, Andrew Arash and Francia, Guillermo and El-Sheikh, Eman}, title = {Visual Adaptive Prompting for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4137-4146} }

Smart Routing for Multimodal Video Retrieval: When to Search What
Kevin Dela Rosa
[pdf] [arXiv]
[bibtex]
@InProceedings{Rosa_2025_ICCV, author = {Rosa, Kevin Dela}, title = {Smart Routing for Multimodal Video Retrieval: When to Search What}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4118-4126} }

Refining Skewed Perceptions in Vision-Language Contrastive Models through Visual Representations
Haocheng Dai,
Sarang Joshi
[pdf] [arXiv]
[bibtex]
@InProceedings{Dai_2025_ICCV, author = {Dai, Haocheng and Joshi, Sarang}, title = {Refining Skewed Perceptions in Vision-Language Contrastive Models through Visual Representations}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4071-4080} }

Chrono: A Simple Blueprint for Representing Time in MLLMs
Boris Meinardus,
Hector G. Rodriguez,
Anil Batra,
Anna Rohrbach,
Marcus Rohrbach
[pdf] [arXiv]
[bibtex]
@InProceedings{Meinardus_2025_ICCV, author = {Meinardus, Boris and Rodriguez, Hector G. and Batra, Anil and Rohrbach, Anna and Rohrbach, Marcus}, title = {Chrono: A Simple Blueprint for Representing Time in MLLMs}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4092-4097} }

Rate-Distortion Limits for Multimodal Retrieval: Theory, Optimal Codes, and Finite-Sample Guarantees
Thomas Y. Chen
[pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2025_ICCV, author = {Chen, Thomas Y.}, title = {Rate-Distortion Limits for Multimodal Retrieval: Theory, Optimal Codes, and Finite-Sample Guarantees}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4147-4156} }

IRR-LMM: Improving On-demand Retail Recommendation with Large Multi-Modal Models
Yihao Zhao,
Nan Lai,
Xiaoming Li,
Xu Yan,
Wenhao Deng,
Hujiang Huang,
Shuai Zhang,
Wei Lin
[pdf]
[bibtex]
@InProceedings{Zhao_2025_ICCV, author = {Zhao, Yihao and Lai, Nan and Li, Xiaoming and Yan, Xu and Deng, Wenhao and Huang, Hujiang and Zhang, Shuai and Lin, Wei}, title = {IRR-LMM: Improving On-demand Retail Recommendation with Large Multi-Modal Models}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4127-4136} }

MIND-RAG: Multimodal Context-Aware and Intent-Aware Retrieval-Augmented Generation for Educational Publications
Jiayang Yu,
Yuxi Xie,
Guixuan Zhang,
Jie Liu,
Zhi Zeng,
Ying Huang,
Shuwu Zhang
[pdf]
[bibtex]
@InProceedings{Yu_2025_ICCV, author = {Yu, Jiayang and Xie, Yuxi and Zhang, Guixuan and Liu, Jie and Zeng, Zhi and Huang, Ying and Zhang, Shuwu}, title = {MIND-RAG: Multimodal Context-Aware and Intent-Aware Retrieval-Augmented Generation for Educational Publications}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4157-4164} }

Towards Reporting Bias in Visual-Language Datasets: Bi-modal Data Augmentation by Decoupling Object-Attribute Association
Qiyu Wu,
Mengjie Zhao,
Yutong He,
Lang Huang,
Junya Ono,
Hiromi Wakaki,
Yuki Mitsufuji
[pdf]
[bibtex]
@InProceedings{Wu_2025_ICCV, author = {Wu, Qiyu and Zhao, Mengjie and He, Yutong and Huang, Lang and Ono, Junya and Wakaki, Hiromi and Mitsufuji, Yuki}, title = {Towards Reporting Bias in Visual-Language Datasets: Bi-modal Data Augmentation by Decoupling Object-Attribute Association}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4098-4107} }

Document Haystack: A Long Context Multimodal Image/Document Understanding Vision LLM Benchmark
Goeric Huybrechts,
Srikanth Ronanki,
Sai Muralidhar Jayanthi,
Jack Fitzgerald,
Srinivasan Veeravanallur
[pdf] [arXiv]
[bibtex]
@InProceedings{Huybrechts_2025_ICCV, author = {Huybrechts, Goeric and Ronanki, Srikanth and Jayanthi, Sai Muralidhar and Fitzgerald, Jack and Veeravanallur, Srinivasan}, title = {Document Haystack: A Long Context Multimodal Image/Document Understanding Vision LLM Benchmark}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4062-4070} }

Global-to-Local or Local-to-Global? Enhancing Image Retrieval with Efficient Local Search and Effective Global Re-ranking
Dror Aiger,
Bingyi Cao,
Kaifeng Chen,
Andre Araujo
[pdf] [arXiv]
[bibtex]
@InProceedings{Aiger_2025_ICCV, author = {Aiger, Dror and Cao, Bingyi and Chen, Kaifeng and Araujo, Andre}, title = {Global-to-Local or Local-to-Global? Enhancing Image Retrieval with Efficient Local Search and Effective Global Re-ranking}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4108-4117} }

Med-GRIM: Enhanced Zero-Shot Medical VQA using prompt-embedded Multimodal Graph RAG
Rakesh Raj Madavan,
Akshat Kaimal,
Hashim Faisal,
Chandrakala S
[pdf]
[bibtex]
@InProceedings{Madavan_2025_ICCV, author = {Madavan, Rakesh Raj and Kaimal, Akshat and Faisal, Hashim and S, Chandrakala}, title = {Med-GRIM: Enhanced Zero-Shot Medical VQA using prompt-embedded Multimodal Graph RAG}, booktitle = {Proceedings of the IEEE/CVF0International0Conference0on0Computer0Vision (ICCV) Workshops}, month = {October}, year = {2025}, pages = {4081-4091} }